package com.yeming.flink.practice.source
import org.apache.flink.streaming.api.scala._

object HdfsFileSource {

  def main(args: Array[String]): Unit = {
    //初始化环境变量
    val streamEnv: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //修改并行度
    streamEnv.setParallelism(1)
    //设置数据源,读取HDFS文件
    val data: DataStream[String] = streamEnv.readTextFile("hdfs://f1:9000/wc.txt")
    //Transformation
    val value: DataStream[(String, Int)] = data.flatMap(_.split(" "))
      .map((_, 1))
      .keyBy(0)
      .sum(1)
    //Sink
    value.print()

    streamEnv.execute("HdfsFileWordcount")
  }

}
